package com.nuc.utils;

import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.IOException;

/**
 * @Author Zhang Chao
 * @Date 2021/5/11 15:21
 * @Version 1.0
 */

public class HtmlParseUtil {

    public static void main(String[] args) throws IOException {
        String url = "http://aqbw.nuc.edu.cn/tzgg.htm";
        Connection connect = Jsoup.connect(url);//获取连接对象
        Document document = connect.get();//获取url页面的内容并解析成document对象
        Elements as = document.getElementsByClass("list").select("a");//获取所有的a标签
        Elements as1 = document.getElementsByClass("list").select("span");//获取所有的a标签
        for (int i = 0; i < as.size(); i++) {//遍历a标签
            Element element = as.get(i);
            Element elements = as1.get(i);
            String href = element.attr("href");//获取链接的url值
            String text = element.text();//获取链接的标题
            String data = elements.getElementsByClass("date").text();
            if(!text.equals("首页") && !text.equals("下页") && !text.equals("尾页")) //筛选
            {
                System.out.println("http://aqbw.nuc.edu.cn/"+ href +"   "+ text + "   " + data);
            }
        }
    }
}
